
############################################################################################################
### Code for replication of: The persistent electoral effects of large-scale infrastructure policies
### Evidence from a rural electrification scheme in Brazil 
### Authors: Victor Araújo, Marta Arrecthe, and Pablo Beramendi						  
############################################################################################################ 

## Required packages to replicate the figures

library(tidyverse) ## baseline functions and operations in R
library(ggplot2) ## to produce plots
library(haven) ## to read files written in dta (STATA) format
library(ggpubr) ## to combine several plots in one figure
library(geobr) ## package to map the Brazilian territory
library(sf) ## package required to run geobr
 

## Setting the working environment where the required files to run the analysis below should be storage 

setwd("")

## Figure 1 (main text): Number of new households, health facilities and schools connected to 
## the electrical grid through the LPT program (2004-2015)
df_lpt <- read.csv("df_figure1_main.csv", sep =",", header = TRUE)
df_lpt$year <- factor(df_lpt$year)
p1 <- ggplot(df_lpt, aes(x=year, y=number_k, group=Group)) +
  geom_line(aes(linetype=Group))+
  geom_point()+ theme_minimal() + ylim(0, max(320)) + 
  ylab("# of connections to the electrical grid (thousands)") + xlab("Year") + 
  theme(legend.position="bottom") 
p1 + theme(legend.title = element_blank())


## Figure 2 (main text): Graphical representation of the impact of the LPT on several outcomes (Panels A-D) 
## + BW est. (h) sensitiveness (Panels E-H) of FRD estimates reported in Table 3 (main text)

## Panels A-H (Figure 1 in the main text) 
df_eseb <- read.csv("df_figure2_main.csv", sep =",", header = TRUE)

## Panel A (voted for PT in 2006)
p1 <- ggplot(df_eseb, 
       aes(x = light_00  , 
           y = first_2006, 
           color = factor(treat))) + 
  geom_point(alpha = 0.1, na.rm = TRUE, show.legend = FALSE) +
  labs(x = "% electricity in 2000", 
       y = "Voted PT (2006)") +
  geom_smooth(method = "loess", level = 0.90, na.rm = TRUE, show.legend = FALSE,) + 
  scale_color_manual(name = "",
                     values = c("#00BFC4","#F8766D"),
                     labels = c("Control", "Treatment")) +
                     geom_vline(xintercept = 85, linetype = "dotted") +
                      theme_minimal() 
p1
                     
  
## Panel B (voted for PT in 2010)
p2 <- ggplot(df_eseb, 
             aes(x = light_00  , 
                 y = first_2010, 
                 color = factor(treat))) + 
  geom_point(alpha = 0.1, na.rm = TRUE, show.legend = FALSE) +
  labs(x = "% electricity in 2000", 
       y = "Voted PT (2010)") +
  geom_smooth(method = "loess", level = 0.80, na.rm = TRUE, show.legend = FALSE,) + # instead of lm, we use loess. See the difference? try with lm
  scale_color_manual(name = "",
                     values = c("#00BFC4","#F8766D"),
                     labels = c("Control", "Treatment")) +
  geom_vline(xintercept = 85, linetype = "dotted") +
  theme_minimal() 
p2


## Panel C (Satisfaction with the primary/secondary education in 2010)
p3 <- ggplot(df_eseb, 
             aes(x = light_00  , 
                 y = basic_educ, 
                 color = factor(treat))) + 
  geom_point(alpha = 0.1, na.rm = TRUE, show.legend = FALSE) +
  labs(x = "% electricity in 2000", 
       y = "Primary education") +
  geom_smooth(method = "loess", level = 0.90, na.rm = TRUE, show.legend = FALSE,) + # instead of lm, we use loess. See the difference? try with lm
  scale_color_manual(name = "",
                     values = c("#00BFC4","#F8766D"),
                     labels = c("Control", "Treatment")) +
  geom_vline(xintercept = 85, linetype = "dotted") +
  theme_minimal() 
p3


## Panel D (Satisfaction with the tertiary education in 2010)
p4 <- ggplot(df_eseb, 
             aes(x = light_00  , 
                 y =  advanced_educ, 
                 color = factor(treat))) + 
  geom_point(alpha = 0.1, na.rm = TRUE, show.legend = FALSE) +
  labs(x = "% electricity in 2000", 
       y = "Tertiary education") +
  geom_smooth(method = "loess", level = 0.90, na.rm = TRUE, show.legend = FALSE,) + # instead of lm, we use loess. See the difference? try with lm
  scale_color_manual(name = "",
                     values = c("#00BFC4","#F8766D"),
                     labels = c("Control", "Treatment")) +
  geom_vline(xintercept = 85, linetype = "dotted") +
  theme_minimal() 
p4


############################################
### BW (h) sensitivity tests (panels E-H)
############################################

## Panel E (sensitiveness test for model 1 in table 3 in the main text)
df_bw <- read.csv("df_panelE_figure2_main.csv", sep =",", header = TRUE)
p5 <- ggplot(df_bw, aes(bw_h, coef)) + 
  geom_point(colour = "black", fill = "white", size = 1, stroke = 1) + 
  theme_minimal() + ylab("Estimate") + xlab(" BW est. (h)") +
  ylim(-3, max(12)) + xlim(7, max(10.2)) +
  geom_hline(yintercept = 0, linetype="dotted",
      color="red") + geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2) +
  
  ggtitle("",
          subtitle = "")
p5


## Panel F (sensitiveness test for model 2 in table 3 in the main text)
df_bw2 <- read.csv("df_panelF_figure2_main.csv", sep =",", header = TRUE)

 p6<- ggplot(df_bw2, aes(bw_h, coef)) + 
   geom_point(colour = "black", fill = "white", size = 1, stroke = 1) + 
   theme_minimal() + ylab("Estimate") + xlab(" BW est. (h)") +
  ylim(-3, max(12)) + xlim(7, max(10.5)) +
   geom_hline(yintercept = 0, linetype="dotted",
        color="red") + geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2) +
  ggtitle("",
          subtitle = "")
p6


## Panel G (sensitiveness test for model 3 in table 3 in the main text)
df_bw3 <- read.csv("df_panelG_figure2_main.csv", sep =",", header = TRUE)
p7 <- ggplot(df_bw3, aes(bw_h, coef)) + 
  geom_point(colour = "black", fill = "white", size = 1, stroke = 1) + 
  theme_minimal() + ylab("Estimate") + xlab(" BW est. (h)") +
  ylim(-3, max(12)) + xlim(4, max(7.5)) +
  geom_hline(yintercept = 0, linetype="dotted",
  color="red") + geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2) +
  ggtitle("",
          subtitle = "")
p7


## Panel H (sensitiveness test for model 4 in table 3 in the main text)
df_bw4 <- read.csv("df_panelH_figure2_main.csv", sep =",", header = TRUE)
p8 <- ggplot(df_bw4, aes(bw_h, coef)) + 
  geom_point() + theme_minimal() + ylab("Estimate") + xlab(" BW est. (h)") +
  ylim(-3, max(12)) + xlim(4, max(7.5)) +
 geom_hline(yintercept = 0, linetype="dotted",
    color="red") + geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2) +
  ggtitle("",
          subtitle = "")
p8



Figure2_main <- ggarrange(p1, p2, p3, p4, p5, p6,p7, p8,
                    labels = c("A", "B", "C", "D", "E", "F",
                               "G", "H"),
                    ncol = 4, nrow = 2)
Figure2_main



## Figure 1 (online appendix A): 
## Percentage of households with electricity in the Brazilian municipalities (2000 and 2010)

### Loading map matrix for the whole country. This might take a few seconds.

## Brazilian municipalities in 2000
all_muni_00 <- read_municipality(year= 2000)
all_muni_00$code_muni <- factor(all_muni_00$code_muni)

## Brazilian municipalities in 2010
all_muni_10 <- read_municipality(year= 2010)
all_muni_10$code_muni <- factor(all_muni_10$code_muni)



### Data on electricity provision (from Brazilian census, 2000 and 2010)
df_cov <- read.csv("df_electricity_cov_00_10.csv", sep =",", header = TRUE)
df_cov$code_muni <- factor(df_cov$code_muni)

data_mapa_00 <- all_muni_00 %>% 
  left_join(df_cov, by = "code_muni")

data_mapa_10 <- all_muni_10 %>% 
  left_join(df_cov, by = "code_muni")


## It may take a few seconds until the plot appears
p1 <- ggplot() +
  geom_sf(data=data_mapa_00, aes(fill=elecov_00), color= "gray", size=.10) +
  labs(subtitle="2000", size=10) +
  scale_fill_distiller(palette = "Greys", name="% households with electricity", limits = c(0,100)) +
  theme_minimal() 
p1_2 <- p1 + theme(legend.position="bottom")
p1_2


## It may take a few seconds until the plot appears
p2 <- ggplot() +
  geom_sf(data=data_mapa_10, aes(fill=elecov_10), color= "gray", size=.10) +
  labs(subtitle="2010", size=10) +
  scale_fill_distiller(palette = "Greys", name="% households with electricity", limits = c(0,100)) +
  theme_minimal()
p2_2 <- p2 + theme(legend.position="bottom") 
p2_2



Figure1_online <- ggarrange(p1_2, p1_2 ,
                          labels = c("A", "B"),
                          ncol = 2, nrow = 1)
Figure1_online




## Figure 2 (online appendix)
## RD plot of the first-stage in FRD models: the probability of being an LPT beneficiary 
#given the value of the running variable -- i.e., the percentage of households with electricity in 2000

df_eseb <- read.csv("df_figure2_appendix.csv", sep =",", header = TRUE)

pfirst <- ggplot(df_eseb, 
             aes(x = light_00  , 
                 y = LPT_benef, 
                 color = factor(treat))) + 
  geom_point(alpha = 0.1, na.rm = TRUE, show.legend = FALSE) +
  labs(x = "% electricity in 2000", 
       y = "Is an LPT beneficiary?") +
  geom_smooth(method = "loess", level = 0.90, na.rm = TRUE, show.legend = FALSE,) + 
  scale_color_manual(name = "",
                     values = c("#00BFC4","#F8766D"),
                     labels = c("Control", "Treatment")) +
  geom_vline(xintercept = 85, linetype = "dotted") +
  theme_minimal() 
pfirst








              
                
























  
  


